# ----------------------------------------------------------------------
#  Project: Sharing Spaces: Segregation, Integration, and Intergroup Relations
#  Publication: APSA Comparative Politics Newsletter (Spring 2020)
#  Last updated: Fri Apr 17 13:35:30 2020
#  Purpose: Create Historgrams and Maps for Figure 1
#  Inputs:"berlin.csv", "grid_link_DEU.csv", "grid_link_FRA.csv", "grid_link_GBR.csv",
#         "london.csv","paris.csv" 
#  Outputs: 3 Maps (Berlin, Paris, London), and 3 histograms (Berlin, Paris, London)
#  Machine: Chagai's macbook pro
# ----------------------------------------------------------------------

# Note, load script as an Rproject to allow for smooth loading of csv files.


# Load Relevant Packages

library("data.table")
library("diverse")
library("sp")
library("ggplot2")
library("reshape2")
library("beepr")

#############################################
######### Berlin ############################
#############################################

# Read and Clean Berlin Census Data

berlin <- read.csv("CSV/berlin.csv")
berlin_grid <- read.csv("CSV/grid_link_DEU.csv")

df_census_Berlin <- data.frame(berlin)
df_census_Berlin <- dcast(df_census_Berlin, GRID_ID ~ origin, value.var = "pop")
df_census_Berlin[is.na(df_census_Berlin)] <- 0
row.names(df_census_Berlin) <- df_census_Berlin$GRID_ID
df_census_Berlin$GRID_ID <- NULL
head(df_census_Berlin)



# Create entropy scores for Berline Grids
diversity_Berlin_by_grid <- diversity(t(df_census_Berlin), type = "entropy",
                                      category_row = TRUE)


# Plot Histogram
ggplot(diversity_Berlin_by_grid, aes(entropy)) + 
 geom_histogram(color = "grey30", fill = "grey70", alpha = .6) + xlim(0, 1) +
 xlab("Cell Entropy") + ylab("") + theme(text = element_text(size = 10)) +
 theme(text = element_text(size = 12, family = "Times"),
       panel.grid.major = element_blank(), 
       axis.text.x = element_text(size = 12),
       plot.caption = element_text(size = 12, family = "Times",hjust = -.02),
       panel.grid.minor = element_blank(),
       panel.background = element_blank(), 
       axis.line = element_line(colour = "black"))



# Set Spatial data
diversity_Berlin_by_grid$GRID_ID <- row.names(diversity_Berlin_by_grid)
diversity_Berlin_by_grid <- merge(diversity_Berlin_by_grid, berlin_grid, all.x = T,
                                  all.y = F, by.y = c("GRID_ID"), by.x = c("GRID_ID"))
coords <- cbind(diversity_Berlin_by_grid$X_WGS84, diversity_Berlin_by_grid$Y_WGS84)
sp <- SpatialPoints(coords)
spdf <- SpatialPointsDataFrame(coords, diversity_Berlin_by_grid)


# Edit map theme
ditch_the_axes <- theme(
 text = element_text(size = 10, family = "Times"),
 axis.text = element_blank(),
 axis.line = element_blank(),
 axis.ticks = element_blank(),
 panel.border = element_blank(),
 panel.grid = element_blank(),
 axis.title = element_blank(),
 panel.background=element_rect(fill = "#FFFFFF", colour = "#FFFFFF"),
 legend.position="none")


# Plot Berlin Map

ggplot(diversity_Berlin_by_grid, 
       aes(X_WGS84, Y_WGS84)) + 
 geom_point(aes(color = entropy), alpha = I(8/10), size = 0.8) + 
 scale_colour_gradient(low = "gray99", high = "gray1") +
 labs(color = "Entropy") +
 ditch_the_axes



#############################################
######### Paris ############################
#############################################


# Read and Clean Paris Census Data
paris <- read.csv("CSV/paris.csv")
paris_grid <- read.csv("CSV/grid_link_FRA.csv")


df_census_Paris <- data.frame(paris)
df_census_Paris <- dcast(df_census_Paris, GRID_ID ~ origin, value.var = "pop")
df_census_Paris[is.na(df_census_Paris)] <- 0
row.names(df_census_Paris) <- df_census_Paris$GRID_ID
df_census_Paris$GRID_ID <- NULL
head(df_census_Paris)

# Create entropy index for paris grids
diversity_Paris_by_grid <- diversity(t(df_census_Paris), type = "entropy",
                                     category_row = TRUE)


# Plot histogram
ggplot(diversity_Paris_by_grid, aes(entropy)) + 
 geom_histogram(color = "grey30", fill = "grey70", alpha = .6) + xlim(0, 1) +
 xlab("Cell Entropy") + ylab("") + theme(text = element_text(size = 10)) +
 theme(text = element_text(size = 12, family = "Times"),
       panel.grid.major = element_blank(), 
       axis.text.x = element_text(size = 12),
       plot.caption = element_text(size = 12, family = "Times",hjust = -.02),
       panel.grid.minor = element_blank(),
       panel.background = element_blank(), 
       axis.line = element_line(colour = "black"))


# Edit spatial data
diversity_Paris_by_grid$GRID_ID <- row.names(diversity_Paris_by_grid)
diversity_Paris_by_grid <- merge(diversity_Paris_by_grid, paris_grid, all.x = T,
                                 all.y = F, by.y = c("GRID_ID"), by.x = c("GRID_ID"))
coords <- cbind(diversity_Paris_by_grid$X_WGS84, diversity_Paris_by_grid$Y_WGS84)
sp <- SpatialPoints(coords)
spdf <- SpatialPointsDataFrame(coords, diversity_Paris_by_grid)


ggplot(diversity_Paris_by_grid, 
       aes(X_WGS84, Y_WGS84)) + 
 geom_point(aes(color = entropy), alpha = I(8/10), size = 0.8) + 
 scale_colour_gradient(low = "grey99", high = "grey1") +
 labs(color = "Entropy") +
 ditch_the_axes





#############################################
######### London ############################
#############################################



# Read and clear London data

london <- read.csv("CSV/london.csv")
london_grid <- read.csv("CSV/grid_link_GBR.csv")


df_census_London <- data.frame(london)
df_census_London <- dcast(df_census_London, GRID_ID ~ origin, value.var = "pop")
df_census_London[is.na(df_census_London)] <- 0
row.names(df_census_London) <- df_census_London$GRID_ID
df_census_London$GRID_ID <- NULL
head(df_census_London)

# Create London entropy index
diversity_London_by_grid <- diversity(t(df_census_London), type = "entropy",
                                      category_row = TRUE)

# Plot histogram
ggplot(diversity_London_by_grid, aes(entropy))  +
 geom_histogram(color = "grey30", fill = "grey70", alpha = .6) + xlim(0, 1) +
 xlab("Cell Entropy") + ylab("") + theme(text = element_text(size = 10)) +
 theme(text = element_text(size = 12, family = "Times"),
       panel.grid.major = element_blank(), 
       axis.text.x = element_text(size = 12),
       plot.caption = element_text(size = 12, family = "Times",hjust = -.02),
       panel.grid.minor = element_blank(),
       panel.background = element_blank(), 
       axis.line = element_line(colour = "black"))




# Modify spatial data
diversity_London_by_grid$GRID_ID <- row.names(diversity_London_by_grid)
diversity_London_by_grid <- merge(diversity_London_by_grid, london_grid, all.x = T,
                                  all.y = F, by.y = c("GRID_ID"), by.x = c("GRID_ID"))
coords <- cbind(diversity_London_by_grid$X_WGS84, diversity_London_by_grid$Y_WGS84)
sp <- SpatialPoints(coords)
spdf <- SpatialPointsDataFrame(coords, diversity_London_by_grid)

# Plot london Map


ggplot(diversity_London_by_grid, 
       aes(X_WGS84, Y_WGS84)) + 
 geom_point(aes(color = entropy), alpha = I(8/10), size = 0.8) + 
 scale_colour_gradient(low = "grey99", high = "grey1") +
 labs(color = "Entropy") +
 ditch_the_axes


